- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4Py Feature Selection Algorithm-based.dsnb
executable file
·1 lines (1 loc) · 7.62 KB
/
OML4Py Feature Selection Algorithm-based.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4Py Feature Selection Algorithm-based","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# OML4Py Feature Selection: Supervised Algorithm","","In this notebook, we demonstrate how to perform feature selection using in-database supervised algorithms via OML4Py.","","We use the customer insurance lifetime value data set which contains customer financial information, lifetime value, and whether or not the customer bought insurance.","","We build a random forest model to predict whether the customer by insurance or not, then use feature importance to perform feature selection. ","","The dataset `CUSTOMER_INSURANCE_LTV` is generated by the `\"OML Run-me-first\"` notebook, which `MUST` be run before this notebook.","","---","","###### `IMPORTANT`: The `\"OML Run-me-first\"` notebook is available under the menu Templates -> Examples and is a prerequisite to the current notebook.","","---","","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information ...","message":["%md ","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4sql/23/dmcon/classification.html#GUID-9F922514-0F8D-42F5-BEB1-F59A09FA1CD2\" target=\"_blank\">OML Classification<\/a>","* <a href=\"https://oracle.com/goto/ml-random-forest\" target=\"_blank\">OML Random Forest<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4py/2/mlpug/random-forest.html\" target=\"_blank\">OML4Py Random Forest Documentation<\/a>"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import libraries and set display options","message":["%python","","import warnings","warnings.filterwarnings('ignore')","","import pandas as pd","import oml"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display a few rows from CUSTOMER_INSURANCE_LTV table","message":["%python","","CUST_INSUR_LTV_DF = oml.sync(table ='CUSTOMER_INSURANCE_LTV')","z.show(CUST_INSUR_LTV_DF.head(10))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Create Train and Test (60/40 split) datasets","message":["%python","","TRAIN, TEST = CUST_INSUR_LTV_DF.split(ratio = (0.6,0.4))","TRAIN_X = TRAIN.drop('BUY_INSURANCE')","TRAIN_Y = TRAIN['BUY_INSURANCE']","TEST_X = TEST","TEST_Y = TEST['BUY_INSURANCE']"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Apply Attribute Importance algorithm to obtain the attribute importance","message":["%python","","setting = {'ODMS_SAMPLING':'ODMS_SAMPLING_DISABLE'}","ai_mod = oml.ai(**setting)","ai_mod = ai_mod.fit(TRAIN_X, TRAIN_Y, case_id = 'CUSTOMER_ID')","z.show(ai_mod.importance.head(10).round(4))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"bar","title":"Plot the top 10 most important attributes","message":["%python","","z.show(ai_mod.importance.head(10).round(4))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"raw\":{\"height\":300,\"lastColumns\":[],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"raw","title":"Build a Random Forest Model to predict who will purchase insurance","message":["%python","","settings = dict()","rf_mod = oml.rf(**settings)","rf_mod.fit(TRAIN_X, TRAIN_Y, case_id = 'CUSTOMER_ID')"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Check the attribute importance from the trained random forest model","message":["%python","","z.show(rf_mod.importance.sort_values('ATTRIBUTE_IMPORTANCE', ascending = False).head(10).round(4))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Plot the top 10 most important features","message":["%python","","z.show(rf_mod.importance.sort_values('ATTRIBUTE_IMPORTANCE', ascending = False).head(10))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# End of Script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]